########################################
# Script for Stats in Enough Water Paper
# Date last edited:December 13 R&R
########################################
library(tidyverse)
library(srvyr)
library(survey)
    # load in survey data 
    load("/pooled_weighted_five.Rdata")
    
    # I use the weights straight out of tandeo 
    # set design matrix with weights
    dat_we <- dat %>% 
      as_survey(weights = c("design_weight", "ps_weights"))
    
   # proportion of households receiving water intermittently
    dat %>% 
      as_survey(weights = c("design_weight", "ps_weights")) %>% 
      group_by(intermittent) %>% 
      summarize(n= survey_total()) %>% 
      mutate(p = round(n/sum(n), 3) )
  
    # average hours of water/week    
    dat %>% 
      as_survey(weights = c("design_weight", "ps_weights")) %>% 
      summarize(water_hours= survey_mean(water_hours_weekly, na.rm=TRUE)) 
    
    # Storage percentages
    dat %>% 
      as_survey(weights = c("design_weight", "ps_weights")) %>% 
      summarize(n = survey_total(),
                n_cistern= survey_total(storage_cistern),
                n_tinaco = survey_total(storage_tinaco),
                n_tambos = survey_total(storage_tambos),
                n_any = survey_total(storage_any)) %>% 
      mutate(p_cistern = round(n_cistern/n, 3),
             p_tinaco = round(n_tinaco/n, 3),
             p_tambos = round(n_tambos/n, 3),
             p_any = round(n_any/n, 3))
    # 45% have cisterns
    # 57% have tinacos 
    #  22% use tambos 
    # 95% report using some form of storage
    dat$storage_days_cat <- factor(dat$storage_days_cat,
                                   levels = c("0", "1 day", "2-5 days", "5-7 days", "1-2 weeks", "2 weeks-1 month", "More than a month"),
                                    labels = c("0", "1 day", "2-5 days", "5-7 days", "1-2 weeks", "2 weeks \n -1 month", "More \n than \n a month"))
   p <-  dat %>% 
      as_survey(weights = c("design_weight", "ps_weights")) %>% 
      filter(!is.na(storage_days_cat)) %>% 
      group_by( storage_days_cat) %>% 
      summarize(n = survey_total()) %>% 
      mutate(p = round(n/sum(n), 3)) %>% 
      ggplot() +
      geom_bar(aes(x= storage_days_cat, y = p), stat = "identity") +
      theme_classic()+
      theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size =20),
            axis.text.y = element_text(size =20),
            axis.title=element_text(size = 20),
            title = element_text(size = 20))+
      labs(x= "", y = "(Weighted) Percent of respondents") 
      
   ggsave(p,file= "storage_capacity.eps")
   

    
    # bring in census data on storage 
    census <- read_csv("4_Demographics/census_2020/principales_resultados_localidad.csv")
    # VPH_CISTER
    # VPH_TINACO
    # TVIVPARHAB
    as.numeric(census[census$LOC=="0000"& census$NOM_MUN =="Total de la entidad Ciudad de México",]$VPH_CISTER)/as.numeric(census[census$LOC=="0000"& census$NOM_MUN =="Total de la entidad Ciudad de México",]$TVIVPARHAB)
    # 60% have cistern
    as.numeric(census[census$LOC=="0000"& census$NOM_MUN =="Total de la entidad Ciudad de México",]$VPH_TINACO)/as.numeric(census[census$LOC=="0000"& census$NOM_MUN =="Total de la entidad Ciudad de México",]$TVIVPARHAB)
    
    
    # who drinks the tap water?
    dat %>% 
      as_survey(weights = c("design_weight", "ps_weights")) %>% 
      group_by(water_source_drink) %>% 
      summarize(n = survey_total() ) %>% 
      mutate(p = n/sum(n))
    
    # cuts to supply 
    
    dat %>% 
      as_survey(weights = c("design_weight", "ps_weights")) %>% 
      group_by(outage_yesno) %>% 
      summarize(n = survey_total() ) %>% 
      mutate(p = n/sum(n))

